Inés Sentís
Date of execution
Sys.Date()
Normalize data and create embeddings for the each time point fraction
sample <- "41BBctr"
suppressMessages(suppressWarnings({
library(Seurat)
library(tidyverse)
library(grid)
library(gridExtra)
library(ggplot2)
library(scater)
library(scran)
}))
#here::dr_here(show_reason = TRUE)
source(here::here("SCGRES_124_125/sc_analysis/misc/paths.R"))
source(here::here("SCGRES_124_125/sc_analysis/misc/variables.R"))
"{clust}/{plt_dir}" %>%
glue::glue() %>%
here::here() %>%
dir.create(path = .,
showWarnings = FALSE,
recursive = TRUE)
"{clust}/{robj_dir}" %>%
glue::glue() %>%
here::here() %>%
dir.create(path = .,
showWarnings = FALSE,
recursive = TRUE)
set.seed(0)
seurat_obj <- readRDS(here::here(glue::glue("{qc}/{robj_dir}/clean_combined_object_{sample}.rds")))
var_name <- paste0("comp_", sample)
dcomp <- get(var_name)
seurat_obj <- NormalizeData(
seurat_obj,
normalization.method = "LogNormalize",
scale.factor = 1e4
)
sce <- as.SingleCellExperiment(seurat_obj)
sce
class: SingleCellExperiment dim: 25804 9024 metadata(0): assays(2): counts logcounts rownames(25804): AL627309.1 AL627309.5 ... AC233755.2 AC007325.4 rowData names(0): colnames(9024): AAACCTGAGAACTGTA-1 AAACCTGAGAATGTTG-1 ... TTTGTCATCGTATCAG-1 TTTGTCATCTTGTCAT-1 colData names(12): orig.ident nCount_RNA ... doublet_pred ident reducedDimNames(0): mainExpName: RNA altExpNames(0):
gene_var <- modelGeneVar(sce)
tops <- gene_var %>%
as.data.frame() %>%
arrange(desc(total)) %>%
head(n=20)
gene_var %>%
as.data.frame() %>%
ggplot(aes(mean, total)) +
geom_point() +
geom_line(aes(y = tech), colour = "dodgerblue", size = 1) +
labs(x = "Mean of log-expression", y = "Variance of log-expression")+
geom_text(data=tops, aes(mean,total,label=rownames(tops)))
Warning message:
“Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.”
gene_var %>%
as.data.frame() %>%
arrange(desc(total)) %>%
head(n=20)
| mean | total | tech | bio | p.value | FDR | |
|---|---|---|---|---|---|---|
| <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | |
| GNLY | 1.0547396 | 2.9613747 | 0.4196620 | 2.5417127 | 1.141553e-229 | 1.453025e-225 |
| CCL5 | 3.4858385 | 2.4630905 | 0.1540516 | 2.3090389 | 0.000000e+00 | 0.000000e+00 |
| AL138963.4 | 1.8666607 | 1.2085101 | 0.3230586 | 0.8854515 | 8.688159e-49 | 1.053212e-45 |
| IL7R | 1.9797260 | 1.1800881 | 0.3049524 | 0.8751357 | 2.772235e-53 | 4.704852e-50 |
| LGALS1 | 1.4299464 | 1.1769585 | 0.3909182 | 0.7860403 | 3.488036e-27 | 1.675376e-24 |
| GZMA | 1.9641542 | 1.1373779 | 0.3074405 | 0.8299374 | 2.173738e-47 | 2.405949e-44 |
| NKG7 | 0.9782712 | 1.1163200 | 0.4185123 | 0.6978078 | 2.752163e-19 | 8.757728e-17 |
| SELL | 0.9169472 | 1.0281926 | 0.4152971 | 0.6128956 | 1.650685e-15 | 4.320243e-13 |
| ZNF683 | 0.6511598 | 0.9853704 | 0.3723036 | 0.6130668 | 7.394079e-19 | 2.267844e-16 |
| HIST1H1E | 1.2620529 | 0.9767506 | 0.4101296 | 0.5666210 | 8.164623e-14 | 1.960819e-11 |
| HOPX | 1.1826766 | 0.9155286 | 0.4157280 | 0.4998006 | 6.884647e-11 | 1.436578e-08 |
| CD74 | 2.0285585 | 0.8478692 | 0.2973072 | 0.5505619 | 2.382528e-23 | 9.052539e-21 |
| TUBA1B | 0.6455332 | 0.8426859 | 0.3708049 | 0.4718810 | 5.451945e-12 | 1.239198e-09 |
| HIST1H4C | 1.4770233 | 0.8258566 | 0.3842241 | 0.4416324 | 4.219307e-10 | 8.262377e-08 |
| CST7 | 0.7945691 | 0.8258029 | 0.4012391 | 0.4245638 | 8.063726e-09 | 1.446211e-06 |
| TRBC1 | 0.9372765 | 0.7950082 | 0.4166144 | 0.3783938 | 6.204811e-07 | 9.291523e-05 |
| LRRN3 | 1.1816861 | 0.7919142 | 0.4157834 | 0.3761308 | 6.839808e-07 | 1.012331e-04 |
| HIST1H1D | 0.9069261 | 0.7903610 | 0.4145503 | 0.3758107 | 6.496574e-07 | 9.671537e-05 |
| RIPOR2 | 1.6231781 | 0.7666858 | 0.3619318 | 0.4047540 | 1.183178e-09 | 2.247774e-07 |
| TUBB | 1.1795813 | 0.7570346 | 0.4158998 | 0.3411349 | 5.960336e-06 | 7.624737e-04 |
hvgs <- getTopHVGs(gene_var,fdr.threshold = 0.05)
length(hvgs)
hvgs <- hvgs[!grepl("^TR[AB][VJC]", hvgs)]
length(hvgs)
seurat_obj <- seurat_obj %>%
ScaleData(features=hvgs) %>%
RunPCA(features=hvgs)
Centering and scaling data matrix PC_ 1 Positive: LTB, IL7R, FTL, PASK, EEF1B2, RIPOR2, CCR7, SELL, TCF7, NELL2 MTRNR2L12, CD52, AL138963.4, TMSB10, LRRN3, MALAT1, S100B, S100A4, KRT1, RPLP0 KIF5C, HPGDS, KLRB1, LINC02315, TRDV3, LINC02109, KIR2DL3, FCRL3, TRGV11, AC005699.1 Negative: UBE2C, TOP2A, ASPM, KIFC1, CDK1, RRM2, HJURP, CKAP2L, DLGAP5, KNL1 GTSE1, BIRC5, KIF15, CDCA8, NUSAP1, TPX2, HMMR, HIST1H3B, CENPF, MKI67 CCNB2, KIF14, STMN1, PLK1, HIST1H1B, CDC20, HIST1H3C, PCLAF, TUBA1B, HIST1H3G PC_ 2 Positive: EEF1B2, IL7R, RPLP0, SELL, CCR7, TCF7, IFITM1, RIPOR2, PASK, LTB LRRN3, AL138963.4, IFITM2, KRT1, TMSB10, PDE3B, TUBA1B, RRM2, HMMR, KIFC1 HIST1H3B, UBE2C, HJURP, HIST1H4C, CDK1, GTSE1, HIST1H3G, CDCA8, TUBB, PLK1 Negative: CST7, NKG7, GNLY, CCL4, ALOX5AP, HLA-DRB1, GZMH, CCL5, ZNF683, GZMB CD2, RCBTB2, CXCR3, CD74, HLA-DRA, LINC02694, HLA-DQA1, JAML, GZMK, CCL4L2 AOAH, LSP1, PPP1R14B, CXCR6, AC013652.1, KLRK1, IL32, CTSW, GZMA, HOPX PC_ 3 Positive: PDE7B, KRT1, HPGDS, LINC02694, RTKN2, AC013652.1, GZMH, MALAT1, CST7, PPP1R14B ALOX5AP, PDE3B, CCL4, ACTG1, GAPDH, PLEK, CCL4L2, ACTB, LSP1, CYP1B1 CCL3, HLA-DQA1, KLRB1, FOXP3, RCBTB2, GZMB, TRDC, HLA-DRA, HIST1H1C, HIST1H2AG Negative: HOPX, CTSW, KLRC4, CCL5, KLRK1, LTB, CD52, NELL2, IFITM2, CD8B RIPOR2, LINC02446, SPINK2, SELL, XCL1, IFITM1, CXCR3, PASK, CCR7, TMSB10 S100A4, CD7, AOAH, GZMA, ZNF683, S100B, LRRN3, NKG7, LGALS1, TCF7 PC_ 4 Positive: IL32, CD7, SPINK2, XCL1, IFITM2, GAPDH, ACTG1, LSP1, FTL, JAML IFITM1, ACTB, LGALS1, CXCR6, KRT1, KRT86, HPGDS, S100A4, HIST1H1C, PDE7B RPLP0, RTKN2, KLRB1, CYP1B1, HMGN2, FOXP3, HOPX, CD2, HIST1H1E, PDE3B Negative: RIPOR2, TMSB10, SELL, GZMH, GZMK, GZMA, CCL4, PLEK, PPP1R14B, HLA-DRB1 CCL4L2, CCR7, HLA-DRA, HLA-DQA1, PASK, CCL3, GZMB, CD52, CD8B, LINC02694 GNLY, LTB, AC013652.1, MALAT1, IL7R, KLRC1, MT-ND6, CD74, NKG7, KLRK1 PC_ 5 Positive: CDC20, CCNB1, LGALS1, CCNB2, PTTG1, PLK1, TMSB10, CD52, HMMR, KIF14 S100A4, TRDC, DLGAP5, BIRC5, CRIP1, CENPE, IFITM2, ASPM, GTSE1, RTKN2 IFITM1, TPX2, LTB, CDCA8, CENPF, PDE7B, GZMA, TYROBP, UBE2C, KLRD1 Negative: HIST1H1E, HIST1H1C, HIST1H1D, HIST1H2AL, HIST1H2AI, HIST1H2AH, HIST1H3C, HIST1H2AB, HIST1H3D, HIST2H2AB HIST1H2AG, HIST1H3F, NELL2, HIST1H1B, HIST1H3B, AL138963.4, CD8B, KIF5C, HIST1H3G, AOAH MT-ND6, MALAT1, LRRN3, KLRC4, PDE3B, RRM2, TCF7, HIST1H4C, SPINK2, CCR7
options(repr.plot.width = 8, repr.plot.height = 6)
ElbowPlot(seurat_obj, n=50)
options(repr.plot.width = 14, repr.plot.height = 6)
FeaturePlot(object = seurat_obj, reduction = "pca",
features = c("nCount_RNA","nFeature_RNA"), order=T)
options(repr.plot.width = 16, repr.plot.height = 16, warn=-1,verbose = FALSE)
genes = c("CD3E", "CD3D",
"CD4", "CD8A","CD8B","FOXP3",
"TOP2A", "IL7R","SELL")
FeaturePlot(seurat_obj, reduction = "pca",
feature=genes, order = F, ncol=3)
options(repr.plot.width = 10, repr.plot.height = 6, warn=-1,verbose = FALSE)
VizDimLoadings(seurat_obj, dims = 1:2, reduction = "pca")
seurat_obj <- RunUMAP(
seurat_obj,
dims = dcomp,
reduction = "pca",
reduction.name = "umap",
reduction.key = "UMAP_"
)
15:56:45 UMAP embedding parameters a = 0.9922 b = 1.112 15:56:45 Read 9024 rows and found 12 numeric columns 15:56:45 Using Annoy for neighbor search, n_neighbors = 30 15:56:45 Building Annoy index with metric = cosine, n_trees = 50 0% 10 20 30 40 50 60 70 80 90 100% [----|----|----|----|----|----|----|----|----|----| * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * | 15:56:46 Writing NN index file to temp file /scratch_tmp/33937423/RtmpozxZ8i/file3ef0b3b066ef0 15:56:46 Searching Annoy index using 1 thread, search_k = 3000 15:56:49 Annoy recall = 100% 15:56:50 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30 15:56:51 Initializing from normalized Laplacian + noise (using irlba) 15:56:51 Commencing optimization for 500 epochs, with 372032 positive edges 15:57:16 Optimization finished
options(repr.plot.width = 8, repr.plot.height = 6, warn=-1,verbose = FALSE)
DimPlot(
seurat_obj,
reduction = "umap",
pt.size = 0.1
) + ggtitle('UMAP') +
theme(plot.title = element_text(hjust = 0.5, size = 12, face = "bold"))
s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes
seurat_obj <- CellCycleScoring(seurat_obj, s.features = s.genes, g2m.features = g2m.genes, set.ident = TRUE)
cat_vars <-c("Phase")
con_vars <- c("nCount_RNA", "nFeature_RNA", "pct_mt", "percent.ribo",
"doublet_score", "PTPRC", "CD4", "CD8A", "CD8B", "TOP2A")
vars <- c(cat_vars, con_vars)
# compute plots
list_plots <- lapply(vars, function(var){
if (var %in% cat_vars) {
p <- DimPlot(seurat_obj, reduction = "umap", group.by=var)
} else {
p <- FeaturePlot(seurat_obj, reduction = "umap", feature=var, order = TRUE)
}
return(p)
})
options(repr.plot.width = 16, repr.plot.height = 12, warn=-1,verbose = FALSE)
# show plots
cp <- cowplot::plot_grid(plotlist = list_plots,
align = "hv",
axis = "trbl",
ncol = 4,
nrow = 3)
cp
options(repr.plot.width = 15, repr.plot.height = 22, warn=-1,verbose = FALSE)
genes = c("PTPRC","SMARCB1","TOP2A", "MKI67","CD3E", "CD3D",
"CD4", "CD8A","CD8B", "TRDC","TRGV1","TRGV2", "FOXP3")
FeaturePlot(seurat_obj, reduction = "umap",
feature=genes, order = T, ncol=3)
saveRDS(seurat_obj, here::here(glue::glue("{clust}/{robj_dir}/dimred_combined_object_{sample}.rds")))
sessionInfo()
R version 4.1.1 (2021-08-10) Platform: x86_64-conda-linux-gnu (64-bit) Running under: CentOS Linux 7 (Core) Matrix products: default BLAS/LAPACK: /home/groups/singlecell/isentis/conda_envs/ines_r4.1.1c/lib/libopenblasp-r0.3.24.so locale: [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C [3] LC_TIME=es_ES.UTF-8 LC_COLLATE=en_US.UTF-8 [5] LC_MONETARY=es_ES.UTF-8 LC_MESSAGES=en_US.UTF-8 [7] LC_PAPER=es_ES.UTF-8 LC_NAME=C [9] LC_ADDRESS=C LC_TELEPHONE=C [11] LC_MEASUREMENT=es_ES.UTF-8 LC_IDENTIFICATION=C attached base packages: [1] stats4 grid stats graphics grDevices utils datasets [8] methods base other attached packages: [1] scran_1.22.1 scater_1.22.0 [3] scuttle_1.4.0 SingleCellExperiment_1.16.0 [5] SummarizedExperiment_1.24.0 Biobase_2.54.0 [7] GenomicRanges_1.46.1 GenomeInfoDb_1.30.1 [9] IRanges_2.28.0 S4Vectors_0.32.4 [11] BiocGenerics_0.40.0 MatrixGenerics_1.6.0 [13] matrixStats_1.0.0 gridExtra_2.3 [15] lubridate_1.9.3 forcats_1.0.0 [17] stringr_1.5.0 dplyr_1.1.3 [19] purrr_1.0.2 readr_2.1.4 [21] tidyr_1.3.0 tibble_3.2.1 [23] ggplot2_3.4.4 tidyverse_2.0.0 [25] SeuratObject_4.1.4 Seurat_4.0.5 loaded via a namespace (and not attached): [1] uuid_1.1-1 plyr_1.8.9 [3] igraph_1.3.4 repr_1.1.6 [5] lazyeval_0.2.2 sp_2.1-0 [7] splines_4.1.1 BiocParallel_1.28.3 [9] listenv_0.9.0 scattermore_1.2 [11] digest_0.6.33 htmltools_0.5.6.1 [13] viridis_0.6.4 fansi_1.0.5 [15] magrittr_2.0.3 ScaledMatrix_1.2.0 [17] tensor_1.5 cluster_2.1.4 [19] ROCR_1.0-11 limma_3.50.3 [21] tzdb_0.4.0 globals_0.16.2 [23] timechange_0.2.0 spatstat.sparse_3.0-1 [25] colorspace_2.1-0 ggrepel_0.9.4 [27] crayon_1.5.2 RCurl_1.98-1.12 [29] jsonlite_1.8.7 progressr_0.14.0 [31] spatstat.data_3.0-1 survival_3.5-7 [33] zoo_1.8-12 glue_1.6.2 [35] polyclip_1.10-4 gtable_0.3.4 [37] zlibbioc_1.40.0 XVector_0.34.0 [39] leiden_0.4.3 DelayedArray_0.20.0 [41] BiocSingular_1.10.0 future.apply_1.11.0 [43] abind_1.4-5 scales_1.2.1 [45] edgeR_3.36.0 spatstat.random_3.1-5 [47] miniUI_0.1.1.1 Rcpp_1.0.10 [49] viridisLite_0.4.2 xtable_1.8-4 [51] dqrng_0.3.1 reticulate_1.34.0 [53] spatstat.core_2.4-2 rsvd_1.0.5 [55] metapod_1.2.0 htmlwidgets_1.6.2 [57] httr_1.4.7 RColorBrewer_1.1-3 [59] ellipsis_0.3.2 ica_1.0-3 [61] farver_2.1.1 pkgconfig_2.0.3 [63] uwot_0.1.16 deldir_1.0-9 [65] here_1.0.1 locfit_1.5-9.8 [67] utf8_1.2.3 labeling_0.4.3 [69] tidyselect_1.2.0 rlang_1.1.1 [71] reshape2_1.4.4 later_1.3.1 [73] munsell_0.5.0 tools_4.1.1 [75] cli_3.6.1 generics_0.1.3 [77] ggridges_0.5.4 evaluate_0.22 [79] fastmap_1.1.1 goftest_1.2-3 [81] fitdistrplus_1.1-11 RANN_2.6.1 [83] sparseMatrixStats_1.6.0 pbapply_1.7-2 [85] future_1.33.0 nlme_3.1-162 [87] mime_0.12 compiler_4.1.1 [89] beeswarm_0.4.0 plotly_4.10.2 [91] png_0.1-8 spatstat.utils_3.0-3 [93] statmod_1.5.0 stringi_1.7.12 [95] bluster_1.4.0 lattice_0.21-8 [97] IRdisplay_1.1 Matrix_1.6-1.1 [99] vctrs_0.6.4 pillar_1.9.0 [101] lifecycle_1.0.3 spatstat.geom_3.2-5 [103] lmtest_0.9-40 BiocNeighbors_1.12.0 [105] RcppAnnoy_0.0.21 data.table_1.14.8 [107] cowplot_1.1.1 bitops_1.0-7 [109] irlba_2.3.5.1 httpuv_1.6.11 [111] patchwork_1.1.3 R6_2.5.1 [113] promises_1.2.1 KernSmooth_2.23-22 [115] vipor_0.4.7 parallelly_1.36.0 [117] codetools_0.2-19 MASS_7.3-60 [119] rprojroot_2.0.3 withr_2.5.1 [121] sctransform_0.4.0 GenomeInfoDbData_1.2.7 [123] mgcv_1.8-42 parallel_4.1.1 [125] hms_1.1.3 beachmat_2.10.0 [127] rpart_4.1.19 IRkernel_1.3.2.9000 [129] DelayedMatrixStats_1.16.0 Cairo_1.6-2 [131] Rtsne_0.16 pbdZMQ_0.3-10 [133] shiny_1.7.5.1 base64enc_0.1-3 [135] ggbeeswarm_0.7.2